# 假设hire_date的格式是"yyyy-MM-dd"  
from pyspark.sql.functions import to_date  
# 将hire_date列从字符串转换为日期类型  
df_with_date = df.withColumn("hire_date", to_date(df["hire_date"], "yyyy-MM-dd"))   
df_with_date.printSchema() 
average_age = df_with_date.agg({"age": "avg"}).collect()[0][0]  
print(f"The average age of employees is: {average_age}")  
gender_count = df_with_date.groupBy("gender").count()  
gender_count.show()